/*This file processes raw sensor data to create statistical features used to produce the results in 
"Wrist-Measured Gunshot Example from Loeffler (2014), "Detecting Gunshots Using Wearable Accelerometers"
The underlying code was written in Spring of 2014 by Charles Loeffler.*/

/*This macro includes the feature creation code that is run on each raw data file*/
%MACRO process_datasets(mdataset);
	libname prc "S:\Loeffler\Gunshot\Data\Processed\"; run;
	proc import datafile="&mdataset" out=gunshot1 dbms=csv replace;
    getnames=no;
	run;

/*Rename variables*/
	data gunshot2(compress=yes); set gunshot1;
		rename VAR1=reltime VAR2=ax VAR4=az;
		if &i = 2 then ay = -VAR3;/*Sensor worn inverted on left-hand for control subject 2*/
		else ay = VAR3;
	run;
/*Delete working files*/
	proc datasets library=work;
		delete gunshot1;
	run;
/*Generate flag for 1.5G spikes*/
	data gunshot3 (compress=yes); set gunshot2;
		if abs(previous_ax-previous2_ax)<1.5 & abs(ax-previous_ax)>=1.5 then forward_flag=1;
		else forward_flag=.;

		previous2_ax = previous_ax;
		previous_ax = ax;
		drop previous_ax previous2_ax;
		retain previous_ax(0) previous2_ax(0);
	run;
/*Delete working files*/
	proc datasets library=work;
		delete gunshot2;
	run;
/*Add observation counter from 1.5G spike location*/
	data gunshot4(compress=yes); set gunshot3;
		if reltime>1 & forward_flag & count eq .  then do;
			spike_flag = 1;
			count = 0;
		end;
		else if count ne . and count<199 then count = count+1;
		else do;
			count = .;
			spike_flag=.;
		end;
		retain count(.) spike_flag(0);
	run;
/*Delete working files*/
	proc datasets library=work;
		delete gunshot3;
	run;
/*Reverse sort the observations*/
	proc sort data=gunshot4; by descending reltime;
	run;
/*Add pre-spike observation counter*/
	data gunshot5(compress=yes); set gunshot4(keep=count ax ay az reltime);
		if count eq 0 then do;
			next_count = count - 1;
		end;
		else if -25<=next_count and next_count ne . then do;
			count = next_count;
			next_count = next_count - 1;
		end; 
		else next_count = .;
		retain next_count(.);
	run;
/*Delete working files*/
	proc datasets library=work;
		delete gunshot4;
	run;
/*Drop all non spike window observations*/
	data gunshot6(compress=yes); set gunshot5;
		if count ne .;
	run;
/*Delete working files*/
	proc datasets library=work;
		delete gunshot5;
	run;
/*Resort data in time order*/
	proc sort data=gunshot6; by count reltime;
	run;
/*Add spike window counter to file*/
	data gunshot7(compress=yes); set gunshot6;
		by count;
		if first.count then window = 1;
		else window = window + 1;
		retain window(.);
	run;
/*Create average features for graphs*/
	%if &i eq 0 %then %do;
		data gunshot_temp(compress=yes); set gunshot7;
			count = count+26;
			if count<=201;
		run;
		/*Get right-hand gunshot ranges*/
		proc import datafile="S:\Loeffler\Gunshot\Data\data_ranges_training.csv" out=data_ranges dbms=csv replace;
    		getnames=yes;
		run;
		/*Add labels*/
		proc sql;
		create table dtemp(compress=yes) as
		select s1.reltime,ax,ay,az,window,count,start,stop,weapon_id
		from gunshot_temp s1, data_ranges s2
		where reltime>=start and reltime<=stop and handgrip in ("rh1","rh2");
		quit;
		proc means data = dtemp;
		class count;
		var ax ay az;
		output out= prc.avg_features mean=avg_ax avg_ay avg_az;
		run;
		proc export outfile="S:\Loeffler\Gunshot\Data\Processed\avg_features_201.csv" data=prc.avg_features dbms=csv replace;
		run;
		data dtemp2(compress=yes); set dtemp;
			keep reltime window count ax ay az;
		proc export outfile="S:\Loeffler\Gunshot\Data\Processed\training_long_201.csv" data=dtemp dbms=csv replace;
		run;
		proc datasets library=work;
		delete dtemp dtemp2 gunshot_temp;
		run;
	%end;
/*Re-center observation counter and add combined acceleration vector*/
	data gunshot8(compress=yes); set gunshot7;
		count = count+26;
		avec = sqrt(ax*ax+ay*ay+az*az);
		if 1<=count<=176;
		keep ax ay az window count avec reltime;
	run;
/*Resort by window and observation counter*/
	proc sort data=gunshot8; by window count;
	run;
/*Delete working files 6-7*/
	proc datasets library=work;
		delete gunshot6-gunshot7;
	run;
/*Create Average features*/
	%if &i eq 0 %then %do;
		/*Get right-hand gunshot ranges*/
		proc import datafile="S:\Loeffler\Gunshot\Data\data_ranges_training.csv" out=data_ranges dbms=csv replace;
    		getnames=yes;
		run;
		/*Add labels*/
		proc sql;
		create table dtemp(compress=yes) as
		select s1.reltime,ax,ay,az,window,count,start,stop
		from gunshot8 s1, data_ranges s2
		where reltime>=start and reltime<=stop and handgrip in ("rh1","rh2");
		quit;

		proc means data = dtemp;
		class count;
		var ax ay az;
		output out= prc.avg_features mean=avg_ax avg_ay avg_az;
		run;
		proc export outfile="S:\Loeffler\Gunshot\Data\Processed\avg_features.csv" data=prc.avg_features dbms=csv replace;
		run;
	
		proc datasets library=work;
		delete dtemp ;
		run;
	%end;


/*Create locally-smoothed regression values*/
	proc loess data=gunshot8 plots=none;
		model ax = count /DIRECT smooth = 0.15;
		by window;
		ods output OutputStatistics=Results;
	run;
	data loess_ax(compress=yes); set Results;
		rename Pred=loess_ax;
		drop SmoothingParameter Obs DepVar;
	run;
	proc loess data=gunshot8 plots=none;
		model ay = count /DIRECT smooth = 0.15;
		by window;
		ods output OutputStatistics=Results;
	run;
	data loess_ay(compress=yes); set Results;
		rename Pred=loess_ay;
		drop SmoothingParameter Obs DepVar;
	run;
	proc loess data=gunshot8 plots=none;
		model az = count /DIRECT smooth = 0.15;
		by window;
		ods output OutputStatistics=Results;
	run;
	data loess_az(compress=yes); set Results;
		rename Pred=loess_az;
		drop SmoothingParameter Obs DepVar;
	run;
/*Transpose arrays*/
	proc transpose data=gunshot8 out=gunshot_wide_ax(compress=yes) prefix=ax;
		by window;
		id count;
		var ax;
	run;
	proc transpose data=gunshot8 out=gunshot_wide_ay(compress=yes) prefix=ay;
		by window;
	id count;
	var ay;
	run;
	proc transpose data=gunshot8 out=gunshot_wide_az(compress=yes) prefix=az;
		by window;
		id count;
		var az;
	run;
	proc transpose data=gunshot8 out=gunshot_wide_avec(compress=yes) prefix=avec;
		by window;
		id count;
		var avec;
	run;
	proc transpose data=gunshot8 out=gunshot_wide_reltime(compress=yes) prefix=reltime;
		by window;
		id count;
		var reltime;
	run;
	proc transpose data=loess_ax out=gunshot_wide_loess_ax(compress=yes) prefix=lax;
		by window;
		id count;
		var loess_ax;
	run;
	proc transpose data=loess_ay out=gunshot_wide_loess_ay(compress=yes) prefix=lay;
		by window;
		id count;
		var loess_ay;
	run;
	proc transpose data=loess_az out=gunshot_wide_loess_az(compress=yes) prefix=laz;
		by window;
		id count;
		var loess_az;
	run;
	proc transpose data=prc.avg_features out=avgfeaturewideax(compress=yes) prefix=avg_ax;
		id count;
		var avg_ax;
	run;
	proc transpose data=prc.avg_features out=avgfeaturewideay(compress=yes) prefix=avg_ay;
		id count;
		var avg_ay;
	run;
	proc transpose data=prc.avg_features out=avgfeaturewideaz(compress=yes) prefix=avg_az;
		id count;
		var avg_az;
	run;
/*Create wide array dataset*/
	data gunshot_wide(compress=yes);
    	merge  gunshot_wide_ax(drop=_name_) gunshot_wide_ay(drop=_name_) gunshot_wide_az(drop=_name_) gunshot_wide_avec(drop=_name_) gunshot_wide_reltime(drop=_name_)
		gunshot_wide_loess_ax (drop=_name_) gunshot_wide_loess_ay (drop=_name_) gunshot_wide_loess_az (drop=_name_);
    	by window;
		file = &i.;
	run;
/*Add average features*/
	data gunshot_wide2(compress=yes); 
		merge gunshot_wide avgfeaturewideax avgfeaturewideay avgfeaturewideaz;
	run;
/*Delete working files*/
	proc datasets library=work;
		delete gunshot_wide;
	run;
/*Calculate window features*/
	data prc.file&i.(compress=yes); set gunshot_wide2;
		array ax(176) ax1-ax176;
		array ay(176) ay1-ay176;
		array az(176) az1-az176;
		array avec(176) avec1-avec176;
		array ex_ax(176) ex_ax1-ex_ax176;
		array loess_ax(176) lax1-lax176;
		array loess_ay(176) lay1-lay176;
		array loess_az(176) laz1-laz176;
		array avg_ax(176) lax1-lax176;
		array avg_ay(176) lay1-lay176;
		array avg_az(176) laz1-laz176;

	/*pre-spike features*/
		largest_prespike = 0;/*Magnitude of largest pre-spike total vector value*/
		largest_prespike_loc = .;/*Location of largest pre-spike total vector value*/
		largest_ch_prespike = 0;/*Magnitude of largest change in pre-spike total vector trend*/
		largest_ch_prespike_loc = .;/*Location of largest change in pre-spike total vector trend*/
		num_gt_1_values = 0;/*Number of samples where total vector is greater than 1*/
		sum_diff_prespike_values = avec1-1;

		do i=2 to 25;
			if avec[i]>largest_prespike then do;
			largest_prespike = avec[i];
			largest_prespike_loc = i-25;
		end;
		if abs(avec[i]-avec[i-1])>largest_ch_prespike then do;
			largest_ch_prespike = abs(avec[i]-avec[i-1]);
			largest_ch_prespike_loc = i-25;
		end;
		if avec[i]>1 then num_gt_1_values= num_gt_1_values + 1;
			sum_diff_prespike_values = sum_diff_prespike_values + avec[i] - 1;
		end;

	/*spike features*/
		spike_magnitude = abs(ax26);		/*magnitude of detected spike*/
		spike_ch_magnitude = abs(ax25-ax26);/*magnitude of change in detected spike*/
		full_spike_magnitude = .;			/*magnitude of full spike (over multiple samples)*/
		full_spike_ch_magnitude = .;		/*magnitude of change in full spike*/
		double_sided = .;					/*flag for whether spike is spiky on both ends*/
		end_spike_loc = .;					/*location of end of spike*/
		over_under = .;						/*was spike both positive and negative*/
		spike_window_count = 1;				/*how many jerk spike changes were detected within spike window*/
		post_spike_window_count = 0;		/*how many jerk spikes changes were detected after spike window*/
		sum_sq_diff_avgax_values = 0;		/*sum of squared differences between ax and average features*/
		sum_sq_diff_avgay_values = 0;		/*sum of squared differences between ay and average features*/
		sum_sq_diff_avgaz_values = 0;		/*sum of squared differences between az and average features*/
		sensor_max_count_values = 0;		/*number of samples with sensor max_out values*/
		num_gt_0_values = 0;				/*number of values greater than zero*/
		recoil_value = 0;					/*Peak recoil value*/
		recoil_loc = 0;						/*Peak recoil location*/
		x_peak_value = 0;					/*X-axis peak value*/
		x_peak_loc = 0;						/*X-axis peak value location*/
		lift_value = 0;						/*Peak Y-axis lift value*/
		lift_loc = 0;						/*Peak Y-axis lift value location*/
		min_z_value = 0;					/*Minimum z value*/
		max_z_value = 0;					/*Maximum z value*/
		min_z_loc = 0;						/*Minimum z value location*/
		max_z_loc = 0;						/*Maximum z value location*/

		do i=27 to 28;
			if ax26<0 & ax[i]<ax26 & ax[i]< ax[i-1] then do;
				full_spike_magnitude = abs(ax[i]);
				full_spike_ch_magnitude = abs(ax[i]-ax25);
			end;
			else if ax26>0 & ax[i]>ax26 & ax[i]> ax[i-1] then do;
				full_spike_magnitude = abs(ax[i]);
				full_spike_ch_magnitude = abs(ax[i]-ax25);
			end;
			else do;
				full_spike_magnitude = abs(ax26);
				full_spike_ch_magnitude = abs(ax26-ax25);
			end;
		end;

		do i=36 to 26 by -1;
			if abs(ax[i]-ax[i+1])<2 & abs(ax[i-1]-ax[i])>2 then do;
				double_sided = 1;
				end_spike_loc = i;
			end;
		end;

		do i = 27 to 176;
			if abs(ax[i]-ax[i-1])>2 and i<=36 then do;
				spike_window_count = spike_window_count + 1;
			end;
			else if abs(ax[i]-ax[i-1])>2 and i>36 then do;
				post_spike_window_count = post_spike_window_count + 1;
			end;
		end;

		do i = 1 to 176;
			if loess_ax[i]<recoil_value then do;
				recoil_value = loess_ax[i];
				recoil_loc = i;
			end;
			if loess_ax[i]>x_peak_value then do;
				x_peak_value = loess_ax[i];
				x_peak_loc = i;
			end;
			if loess_ay[i]>lift_value then do;
				lift_value = loess_ay[i];
				lift_loc = i;
			end;
			if loess_az[i]<min_z_value then do;
				min_z_value = loess_az[i];
				min_z_loc = i;
			end;
			if loess_az[i]>max_z_value then do;
				max_z_value = loess_az[i];
				max_z_loc = i;
			end;
			if loess_ax[i]>0 and i<76 and i>26 then do;
				num_gt_0_values = num_gt_0_values + 1;
			end;
			if abs(ax[i])>15 or abs(ay[i])>15 or abs(az[i])>15 then sensor_max_count_values = sensor_max_count_values + 1;

			sum_sq_diff_avgax_values = sum_sq_diff_avgax_values + (ax[i]-avg_ax[i])**2;
			sum_sq_diff_avgay_values = sum_sq_diff_avgay_values + (ay[i]-avg_ay[i])**2;
			sum_sq_diff_avgaz_values = sum_sq_diff_avgaz_values + (az[i]-avg_az[i])**2;

		end;

	run;
	/*Delete working files*/
	proc datasets library=work;
		delete gunshot_wide2;
	run;
%MEND process_datasets;

/*This macro loops over all ten data files. Files 0 and 6 contain to the gunshot data. All other files contain control subjects.*/
%MACRO loop_through_datasets;
	%DO i = 0 %to 9;
		%process_datasets(S:\Loeffler\Gunshot\Data\file&i..csv);
	%END;
%MEND loop_through_datasets;
%loop_through_datasets;


libname prc "S:\Loeffler\Gunshot\Data\Processed\"; run;
proc import datafile="S:\Loeffler\Gunshot\Data\data_ranges_training.csv" out=data_ranges dbms=csv replace;
    getnames=yes;
run;


/*Add labels*/
proc sql;
create table candidates(compress=yes) as
select reltime27,weapon,handgrip,weapon_id,subject,start,stop
from prc.file0 s1, data_ranges s2
where reltime27>=start and reltime27<=stop;
quit;
proc sort data=candidates; by reltime27;run;
data prc.file0_2(compress=yes); merge prc.file0(IN=one) candidates(IN=two);
by reltime27;
if one=1;
if handgrip in ("rh1","rh2") then gunshot = 1;
else if handgrip in ("lh1","lh2") then gunshot = .;
else gunshot = 0;

if gunshot = 1;
run;

/*Merge training datasets*/
data prc.training_sample(compress=yes); set prc.file0_2 prc.file1-prc.file5;
	array reltime(176) reltime1-reltime176;

	if recoil_loc>45 and recoil_loc<105 then brecoil = 1 ;
	else brecoil = 0;

	if min_z_loc>30 and min_z_loc<80 then bmin_z_recoil = 1;
	else bmin_z_recoil = 0;

	recoil_loc_sq = recoil_loc*recoil_loc;
	min_z_loc_sq = min_z_loc*min_z_loc;

	if gunshot eq . then gunshot = 0;

	/*Drop any spikes with missing values due to shutdown of sensor at conclusion of collection*/
	do i = 1 to 176;
		if reltime[i] = . then delete;
	end;
	drop ax1-ax176 ay1-ay176 az1-az176 avec1-avec176 reltime1-reltime25 reltime27-reltime176 lax1-lax176 lay1-lay176 laz1-laz176 ex_ax1-ex_ax176
		_LABEL_ file i double_sided over_under end_spike_loc window reltime26 weapon handgrip weapon_id subject start stop brecoil bmin_z_recoil
		avg_ax1-avg_ax176 avg_ay1-avg_ay176 avg_az1-avg_az176 _NAME_;

run;

proc export outfile="S:\Loeffler\Gunshot\Data\training_sample.csv" data=prc.training_sample dbms=csv replace;
run;


proc import datafile="S:\Loeffler\Gunshot\Data\Processed\training_long.csv" out=training_long dbms=csv replace;
    getnames=yes;
run;
/*Add labels*/
proc sql;
create table features(compress=yes) as
select s1.reltime,ax,ay,az,window,count,s2.start,s2.stop
from training_long s1, data_ranges s2
where reltime>=s2.start and reltime<=s2.stop and handgrip in ("rh1","rh2");
quit;
proc export outfile="S:\Loeffler\Gunshot\Data\training_long_gunshots.csv" data=features dbms=csv replace;
run;


proc import datafile="S:\Loeffler\Gunshot\Data\data_ranges_test.csv" out=data_ranges dbms=csv replace;
    getnames=yes;
run;
/*Add labels*/
proc sql;
create table candidates(compress=yes) as
select reltime27,weapon,handgrip,weapon_id,subject,start,stop
from prc.file6 s1, data_ranges s2
where reltime27>=start and reltime27<=stop;
quit;
proc sort data=candidates; by reltime27;run;
data prc.file6_2(compress=yes); merge prc.file6(IN=one) candidates(IN=two);
by reltime27;
if one=1;
if handgrip in ("rh1","rh2") then gunshot = 1;
else if handgrip in ("lh1","lh2") then gunshot = .;
else gunshot = 0;

if gunshot = 1;
run;

/*Merge test datasets*/
data prc.test_sample(compress=yes); set prc.file6_2 prc.file7-prc.file9;
	array reltime(176) reltime1-reltime176;
	if recoil_loc>45 and recoil_loc<105 then brecoil = 1 ;
	else brecoil = 0;

	if min_z_loc>30 and min_z_loc<80 then bmin_z_recoil = 1;
	else bmin_z_recoil = 0;

	recoil_loc_sq = recoil_loc*recoil_loc;
	min_z_loc_sq = min_z_loc*min_z_loc;

	if gunshot eq . then gunshot = 0;

	/*Drop any spikes with missing values due to shutdown of sensor at conclusion of collection*/
	do i = 1 to 176;
		if reltime[i] = . then delete;
	end;

	drop ax1-ax176 ay1-ay176 az1-az176 avec1-avec176 reltime1-reltime25 reltime27-reltime176 lax1-lax176 lay1-lay176 laz1-laz176 ex_ax1-ex_ax176
		_LABEL_ file i double_sided over_under end_spike_loc window reltime26 weapon handgrip weapon_id subject start stop brecoil bmin_z_recoil
		avg_ax1-avg_ax176 avg_ay1-avg_ay176 avg_az1-avg_az176 _NAME_;

run;

proc export outfile="S:\Loeffler\Gunshot\Data\test_sample.csv" data=prc.test_sample dbms=csv replace;
run;
